{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Colab only!\n",
    "\n",
    "try:\n",
    "  # %tensorflow_version only exists in Colab.\n",
    "  %tensorflow_version 2.x\n",
    "except Exception:\n",
    "  pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.nn import functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "cell_style": "split",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.1.0\n",
      "WARNING:tensorflow:From <ipython-input-3-2e82a26757ae>:2: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `tf.config.list_physical_devices('GPU')` instead.\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "print(tf.__version__)\n",
    "print(tf.test.is_gpu_available())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.4.0\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "print(torch.__version__)\n",
    "print(torch.cuda.is_available())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Simple Gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def func(x):\n",
    "    return x**2 + x**3 + 5\n",
    "\n",
    "# df(x)/dx = 2x + 3x**2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor([16.], shape=(1,), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "x = tf.Variable([2.])\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    y = func(x)\n",
    "    \n",
    "grad = tape.gradient(y, [x])\n",
    "print(grad[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([16.])\n"
     ]
    }
   ],
   "source": [
    "x = torch.tensor([2.], requires_grad = True)\n",
    "\n",
    "y = func(x)\n",
    "\n",
    "grad = torch.autograd.grad(y, [x])\n",
    "print(grad[0])\n",
    "\n",
    "# y.backward()\n",
    "# print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MSE gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def one_hot(label, depth):\n",
    "    out = torch.zeros(label.size(0), depth)\n",
    "    idx = torch.LongTensor(label).view(-1, 1)\n",
    "    out.scatter_(dim=1, index=idx, value=1)\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(0.23290308, shape=(), dtype=float32)\n",
      "tf.Tensor(\n",
      "[[ 0.00120118 -0.00120119]\n",
      " [ 0.01929211 -0.01929212]\n",
      " [ 0.03523264 -0.03523265]\n",
      " [ 0.04114018 -0.04114018]], shape=(4, 2), dtype=float32)\n",
      "tf.Tensor([ 0.07563752 -0.07563753], shape=(2,), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "# Example: [3,4] linear conversion ->[3,2]\n",
    "#  y = x@w +b  x:[3,4] w:[4,2] b:[2], y:[3]\n",
    "#  y one-hot depth = 2\n",
    "\n",
    "x = tf.random.uniform([3,4])\n",
    "w = tf.random.uniform([4,2])\n",
    "b = tf.zeros([2])\n",
    "y = tf.constant([0, 1, 1])\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    # if the tensors are not variables\n",
    "    tape.watch([w,b])\n",
    "    \n",
    "    logits = x @ w + b\n",
    "    probs = tf.nn.softmax(logits)\n",
    "    \n",
    "    y_true = tf.one_hot(y, depth=2)\n",
    "    \n",
    "    losses = tf.losses.MSE(y_true,probs)\n",
    "    loss = tf.reduce_mean(losses)\n",
    "    \n",
    "grads = tape.gradient(loss, [w,b])\n",
    "\n",
    "grads_w = grads[0]\n",
    "grads_b = grads[1]\n",
    "\n",
    "print(loss)\n",
    "print(grads[0])\n",
    "print(grads[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "cell_style": "split",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(0.2377, grad_fn=<MeanBackward0>)\n",
      "tensor([[-0.0021,  0.0021],\n",
      "        [ 0.0981, -0.0981],\n",
      "        [ 0.0548, -0.0548],\n",
      "        [ 0.0253, -0.0253]])\n",
      "tensor([ 0.0360, -0.0360])\n"
     ]
    }
   ],
   "source": [
    "# Example: [3,4] linear conversion ->[3,2]\n",
    "#  y = x@w +b  x:[3,4] w:[4,2] b:[2], y:[3]\n",
    "#  y one-hot depth = 2\n",
    "\n",
    "x = torch.rand(3,4)\n",
    "w = torch.rand([4,2], requires_grad=True)\n",
    "b = torch.zeros([2], requires_grad=True)\n",
    "y = torch.LongTensor([0, 1, 1])\n",
    "\n",
    "# if \"requires_grad=Flase\"\n",
    "# w.requires_grad_()\n",
    "# b.requires_grad_()\n",
    "\n",
    "logits = x @ w +b\n",
    "probs = F.softmax(logits, dim = 1)\n",
    "\n",
    "y_true = one_hot(y, depth=2)\n",
    "loss = F.mse_loss(y_true, probs)\n",
    "\n",
    "\n",
    "\n",
    "grads = torch.autograd.grad(loss, [w, b])\n",
    "\n",
    "grads_w = grads[0]\n",
    "grads_b = grads[1]\n",
    "\n",
    "\n",
    "print(loss)\n",
    "print(grads_w)\n",
    "print(grads_b)\n",
    "\n",
    "# Alternative way:\n",
    "\n",
    "# loss.backward()\n",
    "# print(w.grad)\n",
    "# print(b.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sorftmax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<tf.Tensor: shape=(3, 3), dtype=float32, numpy=\n",
      "array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00],\n",
      "       [1.2822345e-08, 2.3484551e-08, 2.3297744e-08],\n",
      "       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00]], dtype=float32)>]\n"
     ]
    }
   ],
   "source": [
    "logits = tf.random.uniform([3,3])\n",
    "logits = tf.Variable(logits)\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "#     tape.watch([logits])\n",
    "    \n",
    "    probs = tf.nn.softmax(logits, axis=1)\n",
    "    \n",
    "grads = tape.gradient(probs, [logits])\n",
    "\n",
    "\n",
    "# print(logits)\n",
    "# print(probs)\n",
    "\n",
    "# print(probs[0][0])\n",
    "print(grads)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0.0512, 0.2323, 0.1603],\n",
      "        [0.4415, 0.2883, 0.0061],\n",
      "        [0.3152, 0.3054, 0.9709]], requires_grad=True)\n",
      "tensor([[0.3018, 0.3617, 0.3366],\n",
      "        [0.3992, 0.3425, 0.2583],\n",
      "        [0.2553, 0.2528, 0.4919]], grad_fn=<SoftmaxBackward>)\n",
      "(tensor([[ 0.2107, -0.1091, -0.1016],\n",
      "        [ 0.0000,  0.0000,  0.0000],\n",
      "        [ 0.0000,  0.0000,  0.0000]]),)\n"
     ]
    }
   ],
   "source": [
    "# logist: [b, 3], probs: [b, 3]\n",
    "logits = torch.rand(3,3)\n",
    "logits.requires_grad_()\n",
    "\n",
    "probs = F.softmax(logits, dim = 1)\n",
    "\n",
    "print(logits)\n",
    "print(probs)\n",
    "\n",
    "grad_0_0 = torch.autograd.grad(probs[0][0], logits, retain_graph=True)\n",
    "print(grad_0_0)\n",
    "\n",
    "# probs[0][0].backward(retain_graph=True)\n",
    "# print(logits.grad)\n",
    "\n",
    "# probs[1][1].backward(retain_graph=True)\n",
    "# print(logits.grad)\n",
    "\n",
    "# probs[2][2].backward(retain_graph=True)\n",
    "# print(logits.grad)\n",
    "\n",
    "# probs[0][1].backward(retain_graph=True)\n",
    "# print(logits.grad)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Crossentropy gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(0.7021515, shape=(), dtype=float32)\n",
      "tf.Tensor(\n",
      "[[-0.12753755  0.12753753]\n",
      " [ 0.11524016 -0.11524017]\n",
      " [-0.03575598  0.03575598]\n",
      " [ 0.02847505 -0.02847505]], shape=(4, 2), dtype=float32)\n",
      "tf.Tensor([ 0.10340744 -0.10340746], shape=(2,), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "# Example: [3,4] linear conversion ->[3,2]\n",
    "#  y = x*w +c  x:[3,4] w:[4,2] b:[2], y:[3]\n",
    "#  y one-hot depth = 2\n",
    "\n",
    "x = tf.random.uniform([3,4])\n",
    "w = tf.random.uniform([4,2])\n",
    "b = tf.zeros([2])\n",
    "y = tf.constant([0, 1, 1])\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    tape.watch([w,b])\n",
    "    y_true = tf.one_hot(y, depth=2)\n",
    "    logits = x@w + b\n",
    "    losses = tf.losses.categorical_crossentropy(y_true, \n",
    "                                                logits, \n",
    "                                                from_logits=True)\n",
    "    loss = tf.reduce_mean(losses)\n",
    "    \n",
    "grads = tape.gradient(loss, [w,b])  \n",
    "\n",
    "grad_w = grads[0]\n",
    "grad_b = grads[1]\n",
    "\n",
    "print(loss)\n",
    "print(grad_w)\n",
    "print(grad_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(0.6820, grad_fn=<NllLossBackward>)\n",
      "tensor([[ 0.0685, -0.0685],\n",
      "        [ 0.0468, -0.0468],\n",
      "        [ 0.0173, -0.0173],\n",
      "        [ 0.0620, -0.0620]])\n",
      "tensor([ 0.1484, -0.1484])\n"
     ]
    }
   ],
   "source": [
    "# Example: [3,4] linear conversion ->[3,2]\n",
    "#  y = x*w +c  x:[3,4] w:[4,2] b:[2], y:[3]\n",
    "#  y one-hot not requried here\n",
    "\n",
    "x = torch.rand(3,4)\n",
    "w = torch.rand(4,2, requires_grad=True)\n",
    "b = torch.zeros(2, requires_grad=True)\n",
    "y = torch.LongTensor([0, 1, 1])\n",
    "\n",
    "logits = x@w + b\n",
    "# must use logits rather than probs, one-hot encoding not required\n",
    "loss = F.cross_entropy(logits, y)\n",
    "\n",
    "loss.backward()\n",
    "\n",
    "print(loss)\n",
    "print(w.grad)\n",
    "print(b.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Chain Rule"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor([0.20194398], shape=(1,), dtype=float32)\n",
      "tf.Tensor([ True], shape=(1,), dtype=bool)\n"
     ]
    }
   ],
   "source": [
    "x1 = tf.random.uniform([1])\n",
    "w1 = tf.random.uniform([1])\n",
    "b1 = tf.random.uniform([1])\n",
    "\n",
    "w2 = tf.random.uniform([1])\n",
    "b2 = tf.random.uniform([1])\n",
    "\n",
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    tape.watch([w1,b1,w2,b2])\n",
    "    \n",
    "    y1 = x1*w1 + b1\n",
    "    y2 = y1*w2 + b2\n",
    "    \n",
    "[dy1_dw1] = tape.gradient(y1, [w1])\n",
    "[dy2_dy1] = tape.gradient(y2, [y1])\n",
    "     \n",
    "\n",
    "[dy2_dw1] = tape.gradient(y2, [w1])\n",
    "print(dy2_dw1)\n",
    "print(dy2_dw1 == dy2_dy1 * dy1_dw1) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0.6876])\n",
      "tensor([True])\n"
     ]
    }
   ],
   "source": [
    "x1 = torch.rand(1)\n",
    "w1 = torch.rand(1, requires_grad=True)\n",
    "b1 = torch.rand(1, requires_grad=True)\n",
    "\n",
    "w2 = torch.rand(1, requires_grad=True)\n",
    "b2 = torch.rand(1, requires_grad=True)\n",
    "\n",
    "y1 = x1*w1 + b1\n",
    "y2 = y1*w2 + b2\n",
    "\n",
    "(dy1_dw1,) = torch.autograd.grad(y1, w1, retain_graph=True)\n",
    "\n",
    "(dy2_dy1,) = torch.autograd.grad(y2, y1, retain_graph=True)\n",
    "\n",
    "\n",
    "(dy2_dw1,) = torch.autograd.grad(y2, w1)\n",
    "print(dy2_dw1)\n",
    "print(dy2_dy1 * dy1_dw1 == dy2_dw1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(\n",
      "[[17.540981 17.540981 17.540981]\n",
      " [18.754086 18.754086 18.754086]\n",
      " [ 8.839819  8.839819  8.839819]\n",
      " [19.85136  19.85136  19.85136 ]], shape=(4, 3), dtype=float32)\n",
      "tf.Tensor(\n",
      "[[16.694921  8.69648  17.586002]\n",
      " [16.694921  8.69648  17.586002]\n",
      " [16.694921  8.69648  17.586002]], shape=(3, 3), dtype=float32)\n",
      "tf.Tensor(292.8453, shape=(), dtype=float32)\n",
      "tf.Tensor(292.84534, shape=(), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "x1 = tf.random.uniform([3,4])*10\n",
    "w1 = tf.random.uniform([4,3])*10\n",
    "b1 = tf.zeros([3])\n",
    "\n",
    "w2 = tf.random.uniform([3,2])*10\n",
    "b2 = tf.zeros([2])\n",
    "\n",
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    tape.watch([w1,b1,w2,b2])\n",
    "    \n",
    "    y1 = tf.nn.relu(x1@w1 + b1)\n",
    "    y2 = tf.nn.relu(y1@w2 + b2)\n",
    "    \n",
    "[dy1_dw1] = tape.gradient(y1, [w1])\n",
    "[dy2_dy1] = tape.gradient(y2, [y1])\n",
    "     \n",
    "print(dy1_dw1)\n",
    "print(dy2_dy1)\n",
    "\n",
    "print(dy2_dy1[0][0] * dy1_dw1[0][0])\n",
    "\n",
    "[dy2_dw1] = tape.gradient(y2, [w1])\n",
    "\n",
    "print(dy2_dw1[0][0])\n",
    "\n",
    "# print(dy2_dw1[0][0] == dy2_dy1[0][0] * dy1_dw1[0][0]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-0.1262,  0.0498,  0.0815],\n",
      "        [ 0.0000,  0.0000,  0.0000],\n",
      "        [ 0.0000,  0.0000,  0.0000]])\n",
      "tensor([[0.3605, 0.0000, 0.0000],\n",
      "        [0.4910, 0.0000, 0.0000],\n",
      "        [0.8196, 0.0000, 0.0000],\n",
      "        [0.7047, 0.0000, 0.0000]])\n",
      "tensor(-0.0455)\n",
      "tensor(-0.0455)\n",
      "tensor(True)\n"
     ]
    }
   ],
   "source": [
    "x1 = torch.rand(3,4)\n",
    "w1 = torch.rand(4,3, requires_grad=True)\n",
    "b1 = torch.zeros(3, requires_grad=True)\n",
    "\n",
    "w2 = torch.rand(3,2, requires_grad=True)\n",
    "b2 = torch.zeros(2, requires_grad=True)\n",
    "\n",
    "y1 = F.relu(x1@w1 + b1)\n",
    "y2 = F.softmax(y1@w2 + b2, dim = 1)\n",
    "\n",
    "(dy1_dw1,) = torch.autograd.grad(y1[0][0], w1, retain_graph=True)\n",
    "\n",
    "(dy2_dy1,) = torch.autograd.grad(y2[0][0], y1, retain_graph=True)\n",
    "\n",
    "print(dy2_dy1)\n",
    "print(dy1_dw1)\n",
    "\n",
    "print(dy2_dy1[0][0] * dy1_dw1[0][0])\n",
    "\n",
    "(dy2_dw1,)= torch.autograd.grad(y2[0][0], w1)\n",
    "print(dy2_dw1[0][0])\n",
    "\n",
    "print(dy2_dy1[0][0] * dy1_dw1[0][0] == dy2_dw1[0][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Opimization\n",
    "\n",
    "**Himmelblau function**\n",
    "\n",
    "\\begin{equation}\n",
    "f(x, y)=\\left(x^{2}+y-11\\right)^{2}+\\left(x+y^{2}-7\\right)^{2}\n",
    "\\end{equation}\n",
    "\n",
    "zero points: [3, 2],  [-2.805118, 3.131312],  [-3.779310, -3.283186],  [3.583328, -1.848126]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x,y range: (120,) (120,)\n",
      "X,Y maps: (120, 120) (120, 120)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import  numpy as np\n",
    "from    mpl_toolkits.mplot3d import Axes3D\n",
    "from    matplotlib import pyplot as plt\n",
    "\n",
    "\n",
    "\n",
    "def himmelblau(x):\n",
    "    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2\n",
    "\n",
    "\n",
    "x = np.arange(-6, 6, 0.1)\n",
    "y = np.arange(-6, 6, 0.1)\n",
    "print('x,y range:', x.shape, y.shape)\n",
    "X, Y = np.meshgrid(x, y)\n",
    "print('X,Y maps:', X.shape, Y.shape)\n",
    "Z = himmelblau([X, Y])\n",
    "\n",
    "fig = plt.figure('himmelblau')\n",
    "ax = fig.gca(projection='3d')\n",
    "ax.plot_surface(X, Y, Z)\n",
    "ax.view_init(60, -30)\n",
    "ax.set_xlabel('x')\n",
    "ax.set_ylabel('y')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Without optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0: x = [0.014 0.022], pred = 170.0\n",
      "step 2000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 4000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 6000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 8000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 10000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 12000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 14000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 16000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 18000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 20000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 22000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 24000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 26000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n",
      "step 28000: x = [2.9999971 2.000005 ], pred = 4.483808879740536e-10\n"
     ]
    }
   ],
   "source": [
    "x = tf.Variable([0.,0.])\n",
    "\n",
    "lr = 0.001\n",
    "\n",
    "for step in range(30000):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        pred = himmelblau(x)\n",
    "    \n",
    "    grads = tape.gradient(pred, [x])\n",
    "    x.assign_sub(lr * grads[0])\n",
    "\n",
    "    if(step % 2000 == 0):\n",
    "        print('step {}: x = {}, pred = {}'\n",
    "             .format(step, x.numpy(), pred.numpy()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0: x = [0.014000000432133675, 0.02200000174343586], pred = 170.0\n",
      "step 2000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 4000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 6000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 8000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 10000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 12000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 14000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 16000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 18000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 20000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 22000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 24000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 26000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n",
      "step 28000: x = [2.999997138977051, 2.000005006790161], pred = 4.483808879740536e-10\n"
     ]
    }
   ],
   "source": [
    "x = torch.tensor([0.,0.], requires_grad=True)\n",
    "\n",
    "lr = 0.001\n",
    "\n",
    "for step in range(30000):\n",
    "    \n",
    "    pred = himmelblau(x)\n",
    "    \n",
    "    grads = torch.autograd.grad(pred, [x])\n",
    "    x.data.sub_(lr * grads[0])\n",
    "    \n",
    "    if(step % 2000 == 0):\n",
    "        print('step {}: x = {}, pred = {}'\n",
    "             .format(step, x.tolist(), pred.item()))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### With Optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "x = tf.Variable([0.,0.])\n",
    "\n",
    "lr = 0.001\n",
    "optimizer = tf.optimizers.SGD(lr)\n",
    "\n",
    "for step in range(30000):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        pred = himmelblau(x)\n",
    "    \n",
    "    grads = tape.gradient(pred, [x])\n",
    "    \n",
    "    optimizer.apply_gradients(grads_and_vars = zip(grads, [x]))\n",
    "\n",
    "    if(step % 2000 == 0):\n",
    "        print('step {}: x = {}, pred = {}'\n",
    "             .format(step, x.numpy(), pred.numpy()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "x = torch.tensor([0.,0.], requires_grad=True)\n",
    "\n",
    "lr = 0.001\n",
    "optimizer = torch.optim.SGD([x],lr=lr)\n",
    "\n",
    "for step in range(30000):\n",
    "    \n",
    "    pred = himmelblau(x)\n",
    "    \n",
    "    optimizer.zero_grad()\n",
    "    pred.backward()\n",
    "    optimizer.step()\n",
    "    \n",
    "    if(step % 2000 == 0):\n",
    "        print('step {}: x = {}, pred = {}'\n",
    "             .format(step, x.tolist(), pred.item()))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear Regression\n",
    "#### Creating data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 200\n",
    "batches = 20000\n",
    "lr = 0.001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1 Million Points\n",
    "x_data = np.linspace(0.0,10.0,1000000)\n",
    "\n",
    "noise = np.random.randn(len(x_data))\n",
    "\n",
    "# y = wx + b + noise_levels\n",
    "# w = 2.5, b = 10\n",
    "\n",
    "y_true =  (2.5 * x_data ) + 10 + noise\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "w = tf.Variable([0.])\n",
    "b = tf.Variable([0.])\n",
    "\n",
    "\n",
    "for batch in range(batches):\n",
    "    sample_indx = np.random.randint(len(x_data), size=(batch_size))\n",
    "    x = x_data[sample_indx]\n",
    "    y = y_true[sample_indx]\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        y_pred = x*w + b\n",
    "        loss = tf.reduce_mean((y_pred - y)**2)\n",
    "        \n",
    "    grads = tape.gradient(loss, [w,b])\n",
    "    w.assign_sub(lr * grads[0])\n",
    "    b.assign_sub(lr * grads[1])\n",
    "    \n",
    "    if(batch % 1000 == 0):\n",
    "        print(\"batch: {}, loss ={}, w = {}, b = {}\".format(batch,loss.numpy() ,w.numpy(), b.numpy()))\n",
    "\n",
    "\n",
    "plt.plot(x_data[sample_indx],y_true[sample_indx],'*')\n",
    "plt.plot(x_data, w.numpy()*x_data+b.numpy(),'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cell_style": "split"
   },
   "outputs": [],
   "source": [
    "w = torch.tensor([1.], requires_grad = True)\n",
    "b = torch.tensor([0.], requires_grad = True)\n",
    "\n",
    "for batch in range(batches):\n",
    "    sample_indx = np.random.randint(len(x_data), size=(batch_size))\n",
    "    x = torch.from_numpy(x_data[sample_indx])\n",
    "    y = torch.from_numpy(y_true[sample_indx])\n",
    "    \n",
    "    y_pred = x*w + b\n",
    "    loss= torch.mean((y_pred -y)**2)\n",
    "    \n",
    "    grads = torch.autograd.grad(loss, [w,b])\n",
    "    w.data.sub_(lr * grads[0])\n",
    "    b.data.sub_(lr * grads[1])\n",
    "    \n",
    "    if(batch % 1000 == 0):\n",
    "        print(\"batch: {},loss = {}, w = {}, b = {}\".format(batch, loss.item(), w.item(), b.item()))\n",
    "    \n",
    "plt.plot(x_data[sample_indx],y_true[sample_indx],'*')\n",
    "plt.plot(x_data, w.item()*x_data+b.item(),'r')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
